rm(list=ls(all=TRUE))
library(foreign)
library(plyr)
library(reshape2)
library(ggplot2)
library(stringr)
library(doBy)
library(stringr)
library(ggplot2)
library(ggmap)
library(maps)
library(RgoogleMaps)
library(maptools)
library(foreign)
library(grid)
library(rgdal)

data1<-read.table("~/classification.txt", sep=",")

#cleaning the file
data1$result<-0
data1$result[data1$V2=="[[1]]"]<-1
data1$prob<-0
data1$prob<-word(data1$V3,2,-1)
data1$prob<-as.numeric(str_sub(data1$prob, 1, -3))
data1$result<-as.numeric(data1$result)
data1$probfraud<-NA
for(i in 1:nrow(data1)){
if(data1$result[i]==1){
data1$probfraud[i]<-(data1$prob[i])
}
  if(data1$result[i]==0){
    data1$probfraud[i]<-(1-data1$prob[i])
  }  
}

# I take a conservative approach in which I only classify an image as fraudulent when its probability of being so is twice as large as 
# its probability of being fraudulent. In this case, 
data1$result<-0
data1$result[data1$probfraud>.6666666666]<-1

data1$state<-sub("_.*", "\\1", data1$V1)
data1$district<-word(data1$V1,2,sep=fixed('_'))
data1$district1<-data1$district

data1$district1[data1$district1=="I"]<-1
data1$district1[data1$district1=="II"]<-2
data1$district1[data1$district1=="III"]<-3
data1$district1[data1$district1=="IV"]<-4
data1$district1[data1$district1=="V"]<-5
data1$district1[data1$district1=="VI"]<-6
data1$district1[data1$district1=="VII"]<-7
data1$district1[data1$district1=="VIII"]<-8
data1$district1[data1$district1=="IX"]<-9
data1$district1[data1$district1=="X"]<-10
data1$district1[data1$district1=="XI"]<-11
data1$district1[data1$district1=="XII"]<-12
data1$district1[data1$district1=="XIII"]<-13
data1$district1[data1$district1=="XIV"]<-14
data1$district1[data1$district1=="XV"]<-15
data1$district1[data1$district1=="XVI"]<-16
data1$district1[data1$district1=="XVII"]<-17
data1$district1[data1$district1=="XVIII"]<-18
data1$district1[data1$district1=="XIX"]<-19
data1$district1[data1$district1=="XX"]<-20
data1$district1[data1$district1=="XXI"]<-21
data1$district1[data1$district1=="XXII"]<-22
data1$district1[data1$district1=="XXIII"]<-23
data1$district1[data1$district1=="XXIV"]<-24
data1$district1[data1$district1=="XXV"]<-25
data1$district1[data1$district1=="XXVI"]<-26
data1$district1[data1$district1=="XXVII"]<-27
data1$district1[data1$district1=="XXVIII"]<-28
data1$district1[data1$district1=="XXIX"]<-29
data1$district1[data1$district1=="XXX"]<-30
data1$district1[data1$district1=="XXXI"]<-31
data1$district1[data1$district1=="XXXII"]<-32
data1$district1[data1$district1=="XXXIII"]<-33
data1$district1[data1$district1=="XXXIV"]<-34
data1$district1[data1$district1=="XXXV"]<-35
data1$district1[data1$district1=="XXXVI"]<-36
data1$district1[data1$district1=="XXXVII"]<-37
data1$district1[data1$district1=="XXXVII"]<-38
data1$district1[data1$district1=="XXXIX"]<-39
data1$district1[data1$district1=="XL"]<-40

data1$state1<-data1$state
data1$state1[data1$state1=="Estado de Mexico"]<-"Edomex"

data1$casilla<-1


collapse1 <- summaryBy(result + casilla ~ state1, FUN=c(sum), data=data1)
collapse1$rate<-collapse1$result.sum/collapse1$casilla.sum


collapse1$id<-0
for(i in 1:32){
  collapse1$id[i]<-i
}

data<-read.csv("~/VoteReturns.csv")
data$district[data$dto==1]<-"I"
data$district[data$dto==2]<-"II"
data$district[data$dto==3]<-"III"
data$district[data$dto==4]<-"IV"
data$district[data$dto==5]<-"V"
data$district[data$dto==6]<-"VI"
data$district[data$dto==7]<-"VII"
data$district[data$dto==8]<-"VIII"
data$district[data$dto==9]<-"IX"
data$district[data$dto==10]<-"X"
data$district[data$dto==11]<-"XI"
data$district[data$dto==12]<-"XII"
data$district[data$dto==13]<-"XIII"
data$district[data$dto==14]<-"XIV"
data$district[data$dto==15]<-"XV"
data$district[data$dto==16]<-"XVI"
data$district[data$dto==17]<-"XVII"
data$district[data$dto==18]<-"XVIII"
data$district[data$dto==19]<-"XIX"
data$district[data$dto==20]<-"XX"
data$district[data$dto==21]<-"XXI"
data$district[data$dto==22]<-"XXII"
data$district[data$dto==23]<-"XXIII"
data$district[data$dto==24]<-"XXIV"
data$district[data$dto==25]<-"XXV"
data$district[data$dto==26]<-"XXVI"
data$district[data$dto==27]<-"XXVII"
data$district[data$dto==28]<-"XXVIII"
data$district[data$dto==29]<-"XXIX"
data$district[data$dto==30]<-"XXX"
data$district[data$dto==31]<-"XXXI"
data$district[data$dto==32]<-"XXXII"
data$district[data$dto==33]<-"XXXIII"
data$district[data$dto==34]<-"XXXIV"
data$district[data$dto==35]<-"XXXV"
data$district[data$dto==36]<-"XXXVI"
data$district[data$dto==37]<-"XXXVII"
data$district[data$dto==38]<-"XXXVIII"
data$district[data$dto==39]<-"XXXIX"
data$district[data$dto==40]<-"XL"

data$eedo<-as.character(data$edo)

data$eedo[data$eedo=="Chihuhua"]<-"Chihuahua"
data$eedo[data$eedo=="Edomex"]<-"Estado de Mexico"


data$V1<-paste(data$eedo,"_",data$district,"_",data$foto, sep = "")
data$V1<-str_replace(data$V1, c(".JPG"), "")
data$V1<-str_replace(data$V1, c(".jpg"), "")


data1$V1<-str_replace(data1$V1, c(".JPG"), "")
data1$V1<-str_replace(data1$V1, c(".jpg"), "")

data <- data[order(data$V1),] 
data1 <- data1[order(data1$V1),] 

d<-merge(data, data1, by="V1")

#######################################################
#######################################################
###################### Figure 3 #######################
#######################################################
#######################################################

States <- readOGR("~/mexstates/Mex_adm/","MEX_adm1")
States <- spTransform(States, CRS("+proj=longlat +datum=WGS84"))
States <- fortify(States)
States$id<-as.numeric(States$id)+1


States<-merge(States, collapse1)

States$check<-0
States$check[States$id==3]<-1

mapita<-ggmap(get_map('Aguascalientes', zoom=4, color="bw", maptype = c("satellite")))+
  scale_x_continuous(limits=c(-119,-85), expand=c(0,0))+
  scale_y_continuous(limits=c(14,33), expand=c(0,0))

mapita+
  geom_polygon(aes(fill = rate, x = long, y = lat, group = group), 
               data = States,
               alpha = 0.3, 
               size = 0)+ 
  geom_path(data = States, aes(x = long, y = lat, group = group), color = "white", alpha=0.5)+
  scale_fill_distiller(palette = "YlOrRd", 
                       values = c(1,0),
                       name="Proportion of\naltered\ntallies")+ 
  theme(axis.line=element_blank(),
        axis.text.x=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks=element_blank(),
        axis.title.x=element_blank(),
        axis.title.y=element_blank(),
        panel.background=element_blank(),
        panel.border=element_blank(),
        panel.grid.major=element_blank(),
        panel.grid.minor=element_blank(),
        plot.background=element_blank())

d$vtotal<-d$salinas+d$cardenas+d$clouthier+d$castillo+d$ibarra
d$casilla_y<-1

meanvotes<-summaryBy(salinas + vtotal + casilla_y + result ~ edo, FUN=c(sum), data=d)
meanvotes$psalinas<-meanvotes$salinas.sum/meanvotes$vtotal.sum
meanvotes$rate<-meanvotes$result.sum/meanvotes$casilla_y.sum
meanvotes$nationalvotes<-0.5105125
meanvotes$nationalrate<-0.3314

g<-ggplot(meanvotes, aes(rate, psalinas, label = edo))
g+geom_text(size=8, alpha=.8)+xlim(-.05,.8)+ylim(0.15,.95)+ 
  theme_bw()+xlab("Proportion of tallies with alterations")+ylab("Vote share for Salinas")+ 
  theme(axis.text=element_text(size=24),
        axis.title=element_text(size=24),
        legend.text = element_text(size = 24),
        strip.text.y = element_text(size=24),
        plot.title = element_text(size=24, face="bold", vjust=2))+ theme(legend.title=element_blank())+
  geom_point(aes(nationalrate, nationalvotes), size=24, alpha=.025)


#######################################################
#######################################################
###################### Figure 4 #######################
#######################################################
#######################################################

d$vtotal<-d$salinas+d$clouthier+d$ibarra+d$castillo+d$cardenas

d$psalinas<-d$salinas/d$vtotal
d$pcardenas<-d$cardenas/d$vtotal
d$pclouthier<-d$clouthier/d$vtotal

m<-melt(d[c("psalinas","pclouthier","pcardenas","V1")])
m$result<-rep(d$result,3)

m$variable1<-NA
m$variable1[m$variable=="psalinas"]<-"Salinas (PRI)"
m$variable1[m$variable=="pclouthier"]<-"Clouthier (PAN)"
m$variable1[m$variable=="pcardenas"]<-"Cárdenas (FDN)"

m$class<-NA
m$class[m$result==0]<-"Tallies with\nno alterations"
m$class[m$result==1]<-"Tallies with\nalterations"

m$variable2 = factor(m$variable1, levels=c('Salinas (PRI)','Cárdenas (FDN)','Clouthier (PAN)'))

q1<-ggplot(m, aes(value, linetype=class, color=class, size=class, fill=class))

q1+geom_density(alpha=.5)+facet_grid(variable2~., scales = "free_y")+xlim(0,1)+ 
  theme_bw()+xlab("Vote share")+ylab("Density")+ 
  theme(axis.text=element_text(size=18),
        axis.title=element_text(size=18),
        legend.text = element_text(size = 12),
        strip.text.y = element_text(size=18),
        plot.title = element_text(size=18, face="bold", vjust=2),
        legend.key = element_rect(size = 5),
        legend.key.size = unit(2, 'lines'),
        legend.title=element_blank())+
  scale_color_manual(values=c("red", "blue"))+scale_size_manual(values=c(1,1))+
  scale_linetype_manual(values=c( "dotted","solid"))

